Objective: To visualise how the occupations have changed across the three time periods of 1850, 1880 and 1910.
The graphing process showed that there were some inconsistencies with the coding of `occstr`, such as "DRESSMAKER" and "DRESS MAKER" which need to be addressed.
OCC1950 was incorporated to compare a coded variable consistent across all three census years.
library(dplyr)
library(ggplot2)
library(treemap)
library(extrafont)
library(stringr)
loadfonts()
library(knitr)
library(kableExtra)
library(tidyr)
# Loading
mn_1850 = readr::read_csv("../Data/census_1850_occ_mn.csv") %>% mutate(city = "Manhattan")
bk_1850 = readr::read_csv("../Data/census_1850_occ_bk.csv") %>% mutate(city = "Brooklyn")
OCC_1880 = readr::read_csv("../Data/OCC1880.csv")
OCC_1950 = readr::read_csv("../Data/OCC1950.csv")
# Combining Data
combined_1850 = rbind(mn_1850, bk_1850) %>%
select(age, sex, marst, race, labforce,
occ, city, everything()) %>%
left_join(OCC_1880, by = c("occ" = "code")) %>%
mutate(race = factor(race,
levels = c(100,120,200,210,300),
labels = c("White", "Black (white)",
"Black / African American",
"Mulatto", "American Indian")),
labforce = factor(labforce,
levels = c(0,1,2),
labels = c("N/A",
"No, not in the labor force",
"Yes, in the labor force")),
sex = factor(sex,
levels = c(1,2),
labels = c("Male", "Female")),
marst = factor(marst,
levels = c(1:6),
labels = c("Married, spouse present",
"Married, spouse absent",
"Separated",
"Divorced",
"Widowed",
"Never married/single")))
# Theme setting for visualisations
theme_set(theme_minimal() +
theme(panel.grid.minor = element_blank(),
text = element_text(family = "Arial",
colour = "black")))
col_sex = c("#e21737", "#0b648f")
col_miss = c("#86b817", "#e53238")
# Saving Memory
rm(OCC_1880)
rm(bk_1850)
rm(mn_1850)
# Loading
mn_1880 = readr::read_csv("../Data/census_1880_occ_mn.csv") %>% mutate(city = "Manhattan")
bk_1880 = readr::read_csv("../Data/census_1880_occ_bk.csv") %>% mutate(city = "Brooklyn")
OCC_1880 = readr::read_csv("../Data/OCC1880.csv")
# Combining Data
combined_1880 = rbind(mn_1880, bk_1880) %>%
select(age, sex, race, labforce,
occ, city, everything()) %>%
left_join(OCC_1880, by = c("occ" = "code")) %>%
mutate(age = ifelse(age == "Less than 1 year old",
0,
age),
age = as.numeric(age))
rm(OCC_1880)
rm(bk_1880)
rm(mn_1880)
# Loading
mn_1910 = readr::read_csv("../Data/census_1910_occ_mn.csv") %>% mutate(city = "Manhattan")
bk_1910 = readr::read_csv("../Data/census_1910_occ_bk.csv") %>% mutate(city = "Brooklyn")
OCC_1950 = readr::read_csv("../Data/OCC1950.csv")
# Combining Data
combined_1910 = rbind(mn_1910, bk_1910) %>%
left_join(OCC_1950, by = c("occ1950" = "code")) %>%
mutate(sex = factor(sex,
levels = c(1,2),
labels = c("Male", "Female")),
labor_force = factor(labor_force,
levels = c(0,1,2),
labels = c("N/A",
"No, not in the labor force",
"Yes, in the labor force")))
rm(OCC_1950)
rm(mn_1910)
rm(bk_1910)
This section investigated how occupations have changed across the three time periods of 1850, 1880 and 1910. To this end, occstr was preferred over label due to the consistent categorising of occupations.
Again, as with the previous section of Occupation counts by year, the only respondents who were in the “Yes, in the labor force” level of labforce were included.
Specifically, the following tabs investigate the top 10 occupations of each time period, and a list of them is shown in the next tab.
The tabe below shows the top 10 jobs in each time period
top10_1850 = combined_1850 %>%
filter(labforce == "Yes, in the labor force") %>%
count(occstr) %>%
arrange(desc(n)) %>%
.[1:10, "occstr"]
top10_1880 = combined_1880 %>%
filter(labforce == "Yes, in the labor force") %>%
count(occstr) %>%
arrange(desc(n)) %>%
.[1:10, "occstr"]
top10_1910 = combined_1910 %>%
filter(labor_force == "Yes, in the labor force" &
!is.na(occstr)) %>%
count(occstr) %>%
arrange(desc(n)) %>%
.[1:10, "occstr"]
cbind(top10_1850, top10_1880, top10_1910) %>%
kable(col.names = c("1850", "1880", "1910")) %>%
kable_styling() %>%
scroll_box(width = "100%", height = "300px")
| 1850 | 1880 | 1910 |
|---|---|---|
| LABORER | LABORER | LABORER |
| CLERK | SERVANT | CLERK |
| TAILOR | CLERK IN STORE | OPERATOR |
| CARPENTER | TAILOR | SALESMAN |
| MERCHANT | CARPENTER | SERVANT |
| SEAMAN | CLERK | TAILOR |
| GROCER | DRESS MAKER | DRIVER |
| MASON | BUTCHER | DRESSMAKER |
| SHOEMAKER | PAINTER | CARPENTER |
| CARTMAN | DRESSMAKER | STENOGRAPHER |
From the table of top 10 occupations, we can see that aside from the top jobs being laborers and clerks, there is an inconsistency with the coding of occstrin 1880, such as “DRESSMAKER” and “DRESS MAKER”.
Subsequently, the code was re-ran, to count Dress Maker and Dressmaker together, along with Clerk and Clerk in store. On this run, further ammendments were added, such as to add Domestic Servant and Servant together. The code is not shown, but the new results are:
| 1850 | 1880 | 1910 |
|---|---|---|
| LABORER | SERVANT | LABORER |
| CLERK | LABORER | CLERK |
| TAILOR | CLERK | OPERATOR |
| CARPENTER | DRESSMAKER | SALESMAN |
| MERCHANT | TAILOR | SERVANT |
| SEAMAN | CARPENTER | TAILOR |
| GROCER | BUTCHER | DRIVER |
| MASON | PAINTER | DRESSMAKER |
| SHOEMAKER | PRINTER | CARPENTER |
| CARTMAN | CIGAR MAKER | STENOGRAPHER |
top10_combined = top10_years %>%
left_join(combined_1850 %>%
filter(occstr %in% top10_years$occstr) %>%
count(occstr),
by = "occstr") %>%
left_join(combined_1880 %>%
filter(occstr %in% top10_years$occstr) %>%
count(occstr),
by = "occstr") %>%
left_join(combined_1910 %>%
filter(occstr %in% top10_years$occstr) %>%
count(occstr),
by = "occstr") %>%
select(occupation = occstr,
t_1850 = n.x,
t_1880 = n.y,
t_1910 = n) %>%
gather(key = "year", value = "count", -occupation) %>%
mutate(year = case_when(year == "t_1850" ~ 1850,
year == "t_1880" ~ 1880,
year == "t_1910" ~ 1910),
count = ifelse(is.na(count),
0,
count))
# Adding logical of if occupation was a top 10 for that particular year
top10_by_year = rbind(cbind(top10_1850, year = 1850),
cbind(top10_1880, year = 1880),
cbind(top10_1910, year = 1910)) %>%
select(occupation = occstr, year) %>%
mutate(top = 1)
top10_combined = top10_combined %>%
left_join(top10_by_year, by = c("occupation", "year")) %>% mutate(top = ifelse(is.na(top),
0,
1))
top10_combined %>%
select(occupation, year, top) %>%
spread(key = year, value = top)%>%
kable(col.names = c("occupation","1850", "1880", "1910")) %>%
kable_styling() %>%
scroll_box(width = "100%", height = "300px")
| occupation | 1850 | 1880 | 1910 |
|---|---|---|---|
| BUTCHER | 0 | 1 | 0 |
| CARPENTER | 1 | 1 | 1 |
| CARTMAN | 1 | 0 | 0 |
| CIGAR MAKER | 0 | 1 | 0 |
| CLERK | 1 | 1 | 1 |
| DRESSMAKER | 0 | 1 | 1 |
| DRIVER | 0 | 0 | 1 |
| GROCER | 1 | 0 | 0 |
| LABORER | 1 | 1 | 1 |
| MASON | 1 | 0 | 0 |
| MERCHANT | 1 | 0 | 0 |
| OPERATOR | 0 | 0 | 1 |
| PAINTER | 0 | 1 | 0 |
| PRINTER | 0 | 1 | 0 |
| SALESMAN | 0 | 0 | 1 |
| SEAMAN | 1 | 0 | 0 |
| SERVANT | 0 | 1 | 1 |
| SHOEMAKER | 1 | 0 | 0 |
| STENOGRAPHER | 0 | 0 | 1 |
| TAILOR | 1 | 1 | 1 |
col_3 = c("#ffa600", "#bc5090", "#003f5c")
top10_combined %>%
ggplot(aes(y = reorder(occupation, count),
x = count,
col = factor(year),
group = occupation)) +
geom_point(aes(size = count),
alpha = 0.8,
show.legend = FALSE) +
geom_line() +
scale_color_manual(values = col_3) +
scale_radius() +
scale_x_continuous(breaks = seq(0, 100000, 20000)) +
labs(y = "Occupations", col = "Year", x = "Count",
title = "Graph of Occupation Counts in Top 10 Occupations from 1850, 1880 and 1910",
caption = "Note: Size of points vary on count") +
theme(legend.position = "top")
top10_combined %>%
group_by(occupation) %>%
mutate(difference = first(count) - last(count),
colour = ifelse(difference > 0,
"#86b817",
"#e53238")) %>%
ggplot(aes(y = count, x = year)) +
geom_point(aes(col = colour,
shape = factor(top)),
size = 2) +
geom_line(aes(col = colour)) +
facet_wrap(~reorder(occupation, difference),
scales = "free") +
scale_colour_manual(values = c("#ee4c58", "#56c1ab")) +
labs(y = "Occupations", col = "Year", x = "Count",
title = "Graph of Occupation Counts in Top 10 Occupations from 1850, 1880 and 1910",
caption = "Note: Triangle points were if the occupation was a top 10 occupation in that time\nperiod. Occupations areordered by the size of change from 1850 to 1910") +
theme(legend.position = "none")
top10_combined %>%
group_by(occupation) %>%
mutate(difference = first(count) - last(count),
colour = ifelse(difference > 0,
"#86b817",
"#e53238")) %>%
ggplot(aes(y = count, x = year)) +
geom_col(aes(fill = colour,
col = factor(top)),
size = 1) +
facet_wrap(~reorder(occupation, difference),
scales = "free") +
scale_color_manual(values = c("white", "black")) +
scale_fill_manual(values = c("#ee4c58", "#56c1ab")) +
scale_x_continuous(breaks = c(1850, 1880, 1910)) +
labs(y = "Occupations", col = "Year", x = "Count",
title = "Graph of Occupation Counts in Top 10 Occupations from 1850, 1880 and 1910",
caption = "Note: Black borders were if the occupation was a top 10 occupation in that time\nperiod. Occupations areordered by the size of change from 1850 to 1910") +
theme(legend.position = "none")
This section investigated how occupations have changed across the three time periods of 1850, 1880 and 1910. To this end, occstr was preferred over label due to the consistent categorising of occupations.
Again, as with the previous section of Occupation counts by year, the only respondents who were in the “Yes, in the labor force” level of labforce were included.
Specifically, the following tabs investigate the top 10 occupations of each time period, and a list of them is shown in the next tab.
The tabe below shows the top 10 jobs in each time period
top10_1850 = combined_1850 %>%
filter(labforce == "Yes, in the labor force") %>%
count(occstr) %>%
mutate(perc_lf = round(n / sum(combined_1850$labforce == "Yes, in the labor force") * 100,
digits = 2),
perc_pop = round(n / NROW(combined_1850) * 100,
digits = 2)) %>%
arrange(desc(n)) %>%
.[1:10, "occstr"]
top10_1880 = combined_1880 %>%
filter(labforce == "Yes, in the labor force") %>%
count(occstr) %>%
mutate(perc_lf = round(n / sum(combined_1880$labforce == "Yes, in the labor force") * 100,
digits = 2),
perc_pop = round(n / NROW(combined_1880) * 100,
digits = 2)) %>%
arrange(desc(n)) %>%
.[1:10, "occstr"]
top10_1910 = combined_1910 %>%
filter(labor_force == "Yes, in the labor force" &
!is.na(occstr)) %>%
count(occstr) %>%
mutate(perc_lf = round(n / sum(combined_1910$labor_force == "Yes, in the labor force") * 100,
digits = 2),
perc_pop = round(n / NROW(combined_1910) * 100,
digits = 2)) %>%
arrange(desc(n)) %>%
.[1:10, "occstr"]
cbind(top10_1850, top10_1880, top10_1910) %>%
kable(col.names = c("1850", "1880", "1910")) %>%
kable_styling() %>%
scroll_box(width = "100%", height = "300px")
| 1850 | 1880 | 1910 |
|---|---|---|
| LABORER | LABORER | LABORER |
| CLERK | SERVANT | CLERK |
| TAILOR | CLERK IN STORE | OPERATOR |
| CARPENTER | TAILOR | SALESMAN |
| MERCHANT | CARPENTER | SERVANT |
| SEAMAN | CLERK | TAILOR |
| GROCER | DRESS MAKER | DRIVER |
| MASON | BUTCHER | DRESSMAKER |
| SHOEMAKER | PAINTER | CARPENTER |
| CARTMAN | DRESSMAKER | STENOGRAPHER |
From the table of top 10 occupations, we can see that aside from the top jobs being laborers and clerks, there is an inconsistency with the coding of occstrin 1880, such as “DRESSMAKER” and “DRESS MAKER”.
Subsequently, the code was re-ran, to count Dress Maker and Dressmaker together, along with Clerk and Clerk in store. On this run, further ammendments were added, such as to add Domestic Servant and Servant together. The code is not shown, but the new results are:
| 1850 | 1880 | 1910 |
|---|---|---|
| LABORER | SERVANT | LABORER |
| CLERK | LABORER | CLERK |
| TAILOR | CLERK | OPERATOR |
| CARPENTER | DRESSMAKER | SALESMAN |
| MERCHANT | TAILOR | SERVANT |
| SEAMAN | CARPENTER | TAILOR |
| GROCER | BUTCHER | DRIVER |
| MASON | PAINTER | DRESSMAKER |
| SHOEMAKER | PRINTER | CARPENTER |
| CARTMAN | CIGAR MAKER | STENOGRAPHER |
top10_combined = top10_years %>%
left_join(combined_1850 %>%
filter(occstr %in% top10_years$occstr) %>%
count(occstr) %>%
mutate(perc_lf = round(n / sum(combined_1850$labforce == "Yes, in the labor force") * 100,
digits = 2),
perc_pop = round(n / NROW(combined_1850) * 100,
digits = 2)),
by = "occstr") %>%
left_join(combined_1880 %>%
filter(occstr %in% top10_years$occstr) %>%
count(occstr) %>%
mutate(perc_lf = round(n / sum(combined_1880$labforce == "Yes, in the labor force") * 100,
digits = 2),
perc_pop = round(n / NROW(combined_1880) * 100,
digits = 2)),
by = "occstr") %>%
left_join(combined_1910 %>%
filter(occstr %in% top10_years$occstr) %>%
count(occstr) %>%
mutate(perc_lf = round(n / sum(combined_1910$labor_force == "Yes, in the labor force") * 100,
digits = 2),
perc_pop = round(n / NROW(combined_1910) * 100,
digits = 2)),
by = "occstr") %>%
select(occupation = occstr,
perc_lf_1850 = perc_lf.x,
perc_lf_1880 = perc_lf.y,
perc_lf_1910 = perc_lf) %>%
gather(key = "year", value = "count", -occupation) %>%
mutate(year = case_when(year == "perc_lf_1850" ~ 1850,
year == "perc_lf_1880" ~ 1880,
year == "perc_lf_1910" ~ 1910),
count = ifelse(is.na(count),
0,
count))
# Adding logical of if occupation was a top 10 for that particular year
top10_by_year = rbind(cbind(top10_1850, year = 1850),
cbind(top10_1880, year = 1880),
cbind(top10_1910, year = 1910)) %>%
select(occupation = occstr, year) %>%
mutate(top = 1)
top10_combined = top10_combined %>%
left_join(top10_by_year, by = c("occupation", "year")) %>% mutate(top = ifelse(is.na(top),
0,
1))
top10_combined %>%
select(occupation, year, top) %>%
spread(key = year, value = top)%>%
kable(col.names = c("occupation","1850", "1880", "1910")) %>%
kable_styling() %>%
scroll_box(width = "100%", height = "300px")
| occupation | 1850 | 1880 | 1910 |
|---|---|---|---|
| BUTCHER | 0 | 1 | 0 |
| CARPENTER | 1 | 1 | 1 |
| CARTMAN | 1 | 0 | 0 |
| CIGAR MAKER | 0 | 1 | 0 |
| CLERK | 1 | 1 | 1 |
| DRESSMAKER | 0 | 1 | 1 |
| DRIVER | 0 | 0 | 1 |
| GROCER | 1 | 0 | 0 |
| LABORER | 1 | 1 | 1 |
| MASON | 1 | 0 | 0 |
| MERCHANT | 1 | 0 | 0 |
| OPERATOR | 0 | 0 | 1 |
| PAINTER | 0 | 1 | 0 |
| PRINTER | 0 | 1 | 0 |
| SALESMAN | 0 | 0 | 1 |
| SEAMAN | 1 | 0 | 0 |
| SERVANT | 0 | 1 | 1 |
| SHOEMAKER | 1 | 0 | 0 |
| STENOGRAPHER | 0 | 0 | 1 |
| TAILOR | 1 | 1 | 1 |
col_3 = c("#ffa600", "#bc5090", "#003f5c")
top10_combined %>%
ggplot(aes(y = reorder(occupation, count),
x = count,
col = factor(year),
group = occupation)) +
geom_point(aes(size = count),
alpha = 0.8,
show.legend = FALSE) +
geom_line() +
scale_color_manual(values = col_3) +
scale_radius() +
scale_x_continuous() +
labs(y = "Occupations", col = "Year", x = "Percent of Labor Force",
title = "Graph of Occupation Counts in Top 10 Occupations from 1850, 1880 and 1910",
caption = "Note: Size of points vary on count") +
theme(legend.position = "top")
top10_combined %>%
group_by(occupation) %>%
mutate(difference = first(count) - last(count),
colour = ifelse(difference > 0,
"#86b817",
"#e53238")) %>%
ggplot(aes(y = count, x = year)) +
geom_point(aes(col = colour,
shape = factor(top)),
size = 2) +
geom_line(aes(col = colour)) +
facet_wrap(~reorder(occupation, difference),
scales = "free") +
scale_colour_manual(values = c("#ee4c58", "#56c1ab")) +
labs(y = "Occupations", col = "Year", x = "Count",
title = "Graph of Occupation Percentages in Top 10 Occupations from 1850, 1880 and 1910",
caption = "Note: Triangle points were if the occupation was a top 10 occupation in that time\nperiod. Occupations areordered by the size of change from 1850 to 1910") +
theme(legend.position = "none")
top10_combined %>%
group_by(occupation) %>%
mutate(difference = first(count) - last(count),
colour = ifelse(difference > 0,
"#86b817",
"#e53238")) %>%
ggplot(aes(y = count, x = year)) +
geom_col(aes(fill = colour,
col = factor(top)),
size = 1) +
facet_wrap(~reorder(occupation, difference),
scales = "free") +
scale_color_manual(values = c("white", "black")) +
scale_fill_manual(values = c("#ee4c58", "#56c1ab")) +
scale_x_continuous(breaks = c(1850, 1880, 1910)) +
labs(y = "Occupations", col = "Year", x = "Count",
title = "Graph of Occupation Percentages in Top 10 Occupations from 1850, 1880 and 1910",
caption = "Note: Black borders were if the occupation was a top 10 occupation in that time\nperiod. Occupations areordered by the size of change from 1850 to 1910") +
theme(legend.position = "none")
This section investigated how occupations have changed across the three time periods of 1850, 1880 and 1910. OCC1950 was used in this section to contrast the use of occstr.
Again, as with the previous section of Occupation counts by year, the only respondents who were in the “Yes, in the labor force” level of labforce were included.
Specifically, the following tabs investigate the top 10 occupations of each time period, and a list of them is shown in the next tab.
The tabe below shows the top 10 jobs in each time period
OCC_1950 = readr::read_csv("../Data/OCC1950.csv")
## Parsed with column specification:
## cols(
## code = col_double(),
## label = col_character()
## )
top10_1850 = combined_1850 %>%
left_join(OCC_1950, by = c("occ1950" = "code")) %>%
filter(labforce == "Yes, in the labor force") %>%
count(label.y) %>%
arrange(desc(n)) %>%
.[1:10, "label.y"]
top10_1880 = combined_1880 %>%
left_join(OCC_1950, by = c("occ1950" = "label")) %>%
filter(labforce == "Yes, in the labor force") %>%
count(occ1950) %>%
arrange(desc(n)) %>%
.[1:10, "occ1950"]
top10_1910 = combined_1910 %>%
left_join(OCC_1950, by = c("occ1950" = "code")) %>%
filter(labor_force == "Yes, in the labor force" &
!is.na(occ1950)) %>%
count(label.y) %>%
arrange(desc(n)) %>%
.[1:10, "label.y"]
cbind(top10_1850, top10_1880, top10_1910) %>%
kable(col.names = c("1850", "1880", "1910")) %>%
kable_styling() %>%
scroll_box(width = "100%", height = "300px")
| 1850 | 1880 | 1910 |
|---|---|---|
| Laborers (n.e.c.) | Operative and kindred workers (n.e.c.) | Not yet classified |
| Managers, officials, and proprietors (n.e.c.) | Managers, officials, and proprietors (n.e.c.) | Operative and kindred workers (n.e.c.) |
| Operative and kindred workers (n.e.c.) | Private household workers (n.e.c.) | Managers, officials, and proprietors (n.e.c.) |
| Salesmen and sales clerks (n.e.c.) | Laborers (n.e.c.) | Salesmen and sales clerks (n.e.c.) |
| Carpenters | Salesmen and sales clerks (n.e.c.) | Private household workers (n.e.c.) |
| Tailors and tailoresses | Dressmakers and seamstresses, except factory | Laborers (n.e.c.) |
| Sailors and deck hands | Tailors and tailoresses | Clerical and kindred workers (n.e.c.) |
| Brickmasons, stonemasons, and tile setters | Carpenters | Tailors and tailoresses |
| Truck and tractor drivers | Truck and tractor drivers | Dressmakers and seamstresses, except factory |
| Cabinetmakers | Clerical and kindred workers (n.e.c.) | Bookkeepers |
| 1850 | 1880 | 1910 |
|---|---|---|
| Laborers (n.e.c.) | Operative and kindred workers (n.e.c.) | Not yet classified |
| Managers, officials, and proprietors (n.e.c.) | Managers, officials, and proprietors (n.e.c.) | Operative and kindred workers (n.e.c.) |
| Operative and kindred workers (n.e.c.) | Private household workers (n.e.c.) | Managers, officials, and proprietors (n.e.c.) |
| Salesmen and sales clerks (n.e.c.) | Laborers (n.e.c.) | Salesmen and sales clerks (n.e.c.) |
| Carpenters | Salesmen and sales clerks (n.e.c.) | Private household workers (n.e.c.) |
| Tailors and tailoresses | Dressmakers and seamstresses, except factory | Laborers (n.e.c.) |
| Sailors and deck hands | Tailors and tailoresses | Clerical and kindred workers (n.e.c.) |
| Brickmasons, stonemasons, and tile setters | Carpenters | Tailors and tailoresses |
| Truck and tractor drivers | Truck and tractor drivers | Dressmakers and seamstresses, except factory |
| Cabinetmakers | Clerical and kindred workers (n.e.c.) | Bookkeepers |
top10_combined = top10_years %>%
left_join(combined_1850 %>%
left_join(OCC_1950, by = c("occ1950" = "code")) %>%
filter(label.y %in% top10_years$label.y) %>%
count(label.y),
by = "label.y") %>%
left_join(combined_1880 %>%
left_join(OCC_1950, by = c("occ1950" = "label")) %>%
filter(occ1950 %in% top10_years$label.y) %>%
count(occ1950),
by = c("label.y"="occ1950")) %>%
left_join(combined_1910 %>%
left_join(OCC_1950, by = c("occ1950" = "code")) %>%
filter(label.y %in% top10_years$label.y) %>%
count(label.y),
by = "label.y") %>%
select(occupation = label.y,
t_1850 = n.x,
t_1880 = n.y,
t_1910 = n) %>%
gather(key = "year", value = "count", -occupation) %>%
mutate(year = case_when(year == "t_1850" ~ 1850,
year == "t_1880" ~ 1880,
year == "t_1910" ~ 1910),
count = ifelse(is.na(count),
0,
count))
# Adding logical of if occupation was a top 10 for that particular year
top10_by_year = rbind(cbind(top10_1850, year = 1850),
cbind(top10_1880 %>%
rename(label.y = occ1950), year = 1880),
cbind(top10_1910, year = 1910)) %>%
select(occupation = label.y, year) %>%
mutate(top = 1)
top10_combined = top10_combined %>%
left_join(top10_by_year, by = c("occupation", "year")) %>% mutate(top = ifelse(is.na(top),
0,
1))
top10_combined %>%
select(occupation, year, top) %>%
spread(key = year, value = top)%>%
kable(col.names = c("occupation","1850", "1880", "1910")) %>%
kable_styling() %>%
scroll_box(width = "100%", height = "300px")
| occupation | 1850 | 1880 | 1910 |
|---|---|---|---|
| Bookkeepers | 0 | 0 | 1 |
| Brickmasons, stonemasons, and tile setters | 1 | 0 | 0 |
| Cabinetmakers | 1 | 0 | 0 |
| Carpenters | 1 | 1 | 0 |
| Clerical and kindred workers (n.e.c.) | 0 | 1 | 1 |
| Dressmakers and seamstresses, except factory | 0 | 1 | 1 |
| Laborers (n.e.c.) | 1 | 1 | 1 |
| Managers, officials, and proprietors (n.e.c.) | 1 | 1 | 1 |
| Not yet classified | 0 | 0 | 1 |
| Operative and kindred workers (n.e.c.) | 1 | 1 | 1 |
| Private household workers (n.e.c.) | 0 | 1 | 1 |
| Sailors and deck hands | 1 | 0 | 0 |
| Salesmen and sales clerks (n.e.c.) | 1 | 1 | 1 |
| Tailors and tailoresses | 1 | 1 | 1 |
| Truck and tractor drivers | 1 | 1 | 0 |
col_3 = c("#ffa600", "#bc5090", "#003f5c")
top10_combined %>%
ggplot(aes(y = reorder(occupation, count),
x = count,
col = factor(year),
group = occupation)) +
geom_point(aes(size = count),
alpha = 0.8,
show.legend = FALSE) +
geom_line() +
scale_color_manual(values = col_3) +
scale_radius() +
scale_x_continuous(labels = scales::comma) +
labs(y = "Occupations", col = "Year", x = "Count",
title = "Graph of Occupation Counts in Top 10 Occupations from 1850, 1880 and 1910",
caption = "Note: Size of points vary on count") +
theme(legend.position = "top")
top10_combined %>%
group_by(occupation) %>%
mutate(difference = first(count) - last(count),
colour = ifelse(difference > 0,
"#86b817",
"#e53238")) %>%
ggplot(aes(y = count, x = year)) +
geom_point(aes(col = colour,
shape = factor(top)),
size = 2) +
geom_line(aes(col = colour)) +
facet_wrap(~reorder(occupation, difference),
scales = "free") +
scale_colour_manual(values = c("#ee4c58", "#56c1ab")) +
labs(y = "Occupations", col = "Year", x = "Count",
title = "Graph of Occupation Counts in Top 10 Occupations from 1850, 1880 and 1910",
caption = "Note: Triangle points were if the occupation was a top 10 occupation in that time\nperiod. Occupations areordered by the size of change from 1850 to 1910") +
theme(legend.position = "none")
top10_combined %>%
group_by(occupation) %>%
mutate(difference = first(count) - last(count),
colour = ifelse(difference > 0,
"#86b817",
"#e53238")) %>%
ggplot(aes(y = count, x = year)) +
geom_col(aes(fill = colour,
col = factor(top)),
size = 1) +
facet_wrap(~reorder(occupation, difference),
scales = "free") +
scale_color_manual(values = c("white", "black")) +
scale_fill_manual(values = c("#ee4c58", "#56c1ab")) +
scale_x_continuous(breaks = c(1850, 1880, 1910)) +
labs(y = "Occupations", col = "Year", x = "Count",
title = "Graph of Occupation Counts in Top 10 Occupations from 1850, 1880 and 1910",
caption = "Note: Black borders were if the occupation was a top 10 occupation in that time\nperiod. Occupations areordered by the size of change from 1850 to 1910") +
theme(legend.position = "none")
This section investigated how occupations have changed across the three time periods of 1850, 1880 and 1910. OCC1950 was used in this section to contrast the use of occstr.
Again, as with the previous section of Occupation counts by year, the only respondents who were in the “Yes, in the labor force” level of labforce were included.
Specifically, the following tabs investigate the top 10 occupations of each time period, and a list of them is shown in the next tab.
The tabe below shows the top 10 jobs in each time period
OCC_1950 = readr::read_csv("../Data/OCC1950.csv")
## Parsed with column specification:
## cols(
## code = col_double(),
## label = col_character()
## )
top10_1850 = combined_1850 %>%
left_join(OCC_1950, by = c("occ1950" = "code")) %>%
filter(labforce == "Yes, in the labor force") %>%
count(label.y) %>%
mutate(perc_lf = round(n / sum(combined_1850$labforce == "Yes, in the labor force") * 100,
digits = 2),
perc_pop = round(n / NROW(combined_1850) * 100,
digits = 2)) %>%
arrange(desc(n)) %>%
.[1:10, "label.y"]
top10_1880 = combined_1880 %>%
left_join(OCC_1950, by = c("occ1950" = "label")) %>%
filter(labforce == "Yes, in the labor force") %>%
count(occ1950) %>%
mutate(perc_lf = round(n / sum(combined_1910$labor_force == "Yes, in the labor force") * 100,
digits = 2),
perc_pop = round(n / NROW(combined_1910) * 100,
digits = 2)) %>%
arrange(desc(n)) %>%
.[1:10, "occ1950"]
top10_1910 = combined_1910 %>%
left_join(OCC_1950, by = c("occ1950" = "code")) %>%
filter(labor_force == "Yes, in the labor force" &
!is.na(occ1950)) %>%
count(label.y) %>%
mutate(perc_lf = round(n / sum(combined_1910$labor_force == "Yes, in the labor force") * 100,
digits = 2),
perc_pop = round(n / NROW(combined_1910) * 100,
digits = 2)) %>%
arrange(desc(n)) %>%
.[1:10, "label.y"]
cbind(top10_1850, top10_1880, top10_1910) %>%
kable(col.names = c("1850", "1880", "1910")) %>%
kable_styling() %>%
scroll_box(width = "100%", height = "300px")
| 1850 | 1880 | 1910 |
|---|---|---|
| Laborers (n.e.c.) | Operative and kindred workers (n.e.c.) | Not yet classified |
| Managers, officials, and proprietors (n.e.c.) | Managers, officials, and proprietors (n.e.c.) | Operative and kindred workers (n.e.c.) |
| Operative and kindred workers (n.e.c.) | Private household workers (n.e.c.) | Managers, officials, and proprietors (n.e.c.) |
| Salesmen and sales clerks (n.e.c.) | Laborers (n.e.c.) | Salesmen and sales clerks (n.e.c.) |
| Carpenters | Salesmen and sales clerks (n.e.c.) | Private household workers (n.e.c.) |
| Tailors and tailoresses | Dressmakers and seamstresses, except factory | Laborers (n.e.c.) |
| Sailors and deck hands | Tailors and tailoresses | Clerical and kindred workers (n.e.c.) |
| Brickmasons, stonemasons, and tile setters | Carpenters | Tailors and tailoresses |
| Truck and tractor drivers | Truck and tractor drivers | Dressmakers and seamstresses, except factory |
| Cabinetmakers | Clerical and kindred workers (n.e.c.) | Bookkeepers |
| 1850 | 1880 | 1910 |
|---|---|---|
| Laborers (n.e.c.) | Operative and kindred workers (n.e.c.) | Not yet classified |
| Managers, officials, and proprietors (n.e.c.) | Managers, officials, and proprietors (n.e.c.) | Operative and kindred workers (n.e.c.) |
| Operative and kindred workers (n.e.c.) | Private household workers (n.e.c.) | Managers, officials, and proprietors (n.e.c.) |
| Salesmen and sales clerks (n.e.c.) | Laborers (n.e.c.) | Salesmen and sales clerks (n.e.c.) |
| Carpenters | Salesmen and sales clerks (n.e.c.) | Private household workers (n.e.c.) |
| Tailors and tailoresses | Dressmakers and seamstresses, except factory | Laborers (n.e.c.) |
| Sailors and deck hands | Tailors and tailoresses | Clerical and kindred workers (n.e.c.) |
| Brickmasons, stonemasons, and tile setters | Carpenters | Tailors and tailoresses |
| Truck and tractor drivers | Truck and tractor drivers | Dressmakers and seamstresses, except factory |
| Cabinetmakers | Clerical and kindred workers (n.e.c.) | Bookkeepers |
top10_combined = top10_years %>%
left_join(combined_1850 %>%
left_join(OCC_1950, by = c("occ1950" = "code")) %>%
filter(label.y %in% top10_years$label.y) %>%
count(label.y) %>%
mutate(perc_lf = round(n / sum(combined_1850$labforce == "Yes, in the labor force") * 100,
digits = 2),
perc_pop = round(n / NROW(combined_1850) * 100,
digits = 2)),
by = "label.y") %>%
left_join(combined_1880 %>%
left_join(OCC_1950, by = c("occ1950" = "label")) %>%
filter(occ1950 %in% top10_years$label.y) %>%
count(occ1950) %>%
mutate(perc_lf = round(n / sum(combined_1880$labforce == "Yes, in the labor force") * 100,
digits = 2),
perc_pop = round(n / NROW(combined_1880) * 100,
digits = 2)),
by = c("label.y"="occ1950")) %>%
left_join(combined_1910 %>%
left_join(OCC_1950, by = c("occ1950" = "code")) %>%
filter(label.y %in% top10_years$label.y) %>%
count(label.y) %>%
mutate(perc_lf = round(n / sum(combined_1910$labor_force == "Yes, in the labor force") * 100,
digits = 2),
perc_pop = round(n / NROW(combined_1910) * 100,
digits = 2)),
by = "label.y") %>%
select(occupation = label.y,
perc_lf_1850 = perc_lf.x,
perc_lf_1880 = perc_lf.y,
perc_lf_1910 = perc_lf) %>%
gather(key = "year", value = "count", -occupation) %>%
mutate(year = case_when(year == "perc_lf_1850" ~ 1850,
year == "perc_lf_1880" ~ 1880,
year == "perc_lf_1910" ~ 1910),
count = ifelse(is.na(count),
0,
count))
# Adding logical of if occupation was a top 10 for that particular year
top10_by_year = rbind(cbind(top10_1850, year = 1850),
cbind(top10_1880 %>%
rename(label.y = occ1950), year = 1880),
cbind(top10_1910, year = 1910)) %>%
select(occupation = label.y, year) %>%
mutate(top = 1)
top10_combined = top10_combined %>%
left_join(top10_by_year, by = c("occupation", "year")) %>% mutate(top = ifelse(is.na(top),
0,
1))
top10_combined %>%
select(occupation, year, top) %>%
spread(key = year, value = top)%>%
kable(col.names = c("occupation","1850", "1880", "1910")) %>%
kable_styling() %>%
scroll_box(width = "100%", height = "300px")
| occupation | 1850 | 1880 | 1910 |
|---|---|---|---|
| Bookkeepers | 0 | 0 | 1 |
| Brickmasons, stonemasons, and tile setters | 1 | 0 | 0 |
| Cabinetmakers | 1 | 0 | 0 |
| Carpenters | 1 | 1 | 0 |
| Clerical and kindred workers (n.e.c.) | 0 | 1 | 1 |
| Dressmakers and seamstresses, except factory | 0 | 1 | 1 |
| Laborers (n.e.c.) | 1 | 1 | 1 |
| Managers, officials, and proprietors (n.e.c.) | 1 | 1 | 1 |
| Not yet classified | 0 | 0 | 1 |
| Operative and kindred workers (n.e.c.) | 1 | 1 | 1 |
| Private household workers (n.e.c.) | 0 | 1 | 1 |
| Sailors and deck hands | 1 | 0 | 0 |
| Salesmen and sales clerks (n.e.c.) | 1 | 1 | 1 |
| Tailors and tailoresses | 1 | 1 | 1 |
| Truck and tractor drivers | 1 | 1 | 0 |
col_3 = c("#ffa600", "#bc5090", "#003f5c")
top10_combined %>%
ggplot(aes(y = reorder(occupation, count),
x = count,
col = factor(year),
group = occupation)) +
geom_point(aes(size = count),
alpha = 0.8,
show.legend = FALSE) +
geom_line() +
scale_color_manual(values = col_3) +
scale_radius() +
scale_x_continuous(labels = scales::comma) +
labs(y = "Occupations", col = "Year", x = "Percent",
title = "Graph of Occupation % in Top 10 Occupations from 1850, 1880 and 1910",
caption = "Note: Size of points vary on count") +
theme(legend.position = "top")
top10_combined %>%
group_by(occupation) %>%
mutate(difference = first(count) - last(count),
colour = ifelse(difference > 0,
"#86b817",
"#e53238")) %>%
ggplot(aes(y = count, x = year)) +
geom_point(aes(col = colour,
shape = factor(top)),
size = 2) +
geom_line(aes(col = colour)) +
facet_wrap(~reorder(occupation, difference),
scales = "free") +
scale_colour_manual(values = c("#ee4c58", "#56c1ab")) +
labs(y = "Occupations", col = "Year", x = "Percent",
title = "Graph of Occupation % in Top 10 Occupations from 1850, 1880 and 1910",
caption = "Note: Triangle points were if the occupation was a top 10 occupation in that time\nperiod. Occupations areordered by the size of change from 1850 to 1910") +
theme(legend.position = "none")
top10_combined %>%
group_by(occupation) %>%
mutate(difference = first(count) - last(count),
colour = ifelse(difference > 0,
"#86b817",
"#e53238")) %>%
ggplot(aes(y = count, x = year)) +
geom_col(aes(fill = colour,
col = factor(top)),
size = 1) +
facet_wrap(~reorder(occupation, difference),
scales = "free") +
scale_color_manual(values = c("white", "black")) +
scale_fill_manual(values = c("#ee4c58", "#56c1ab")) +
scale_x_continuous(breaks = c(1850, 1880, 1910)) +
labs(y = "Occupations", col = "Year", x = "Percent",
title = "Graph of Occupation % in Top 10 Occupations from 1850, 1880 and 1910",
caption = "Note: Black borders were if the occupation was a top 10 occupation in that time\nperiod. Occupations areordered by the size of change from 1850 to 1910") +
theme(legend.position = "none")